# encoding=utf-8

import re

# 从urlData中获取全部的子url
def get_all_urls(urlData):
    sonUrls = []
    links = re.findall('"((http|ftp)s?://.*?)"', urlData)
    for i in range(len(links)):
        if ".css" not in links[i][0] and ".js" not in links[i][0] and ".dtd" not in links[i][0]:
            sonUrls.append(links[i][0])
        else:
            pass
    return sonUrls
